import pandas as pd
import numpy as np

class DataProcessor:
    def clean_data(self, df):
        """数据清洗：处理缺失值、异常值"""
        df = df.dropna()
        for col in df.select_dtypes(include=np.number).columns:
            df[col] = df[col].apply(lambda x: np.nan if x < 0 else x)  # 移除负值
        return df

    def calculate_green_index(self, df, gdp_col, tech_col, env_col):
        """计算绿色生产力指标（示例公式）"""
        df['GreenProductivityIndex'] = (
            0.3 * df[gdp_col] + 
            0.5 * df[tech_col] - 
            0.2 * df[env_col]  # 环境成本惩罚项
        )
        return df

    def add_features(self, df, time_col):
        """添加时间序列特征（移动平均、同比）"""
        df[time_col] = pd.to_datetime(df[time_col])
        df = df.sort_values(time_col)
        df['MA_5'] = df['GreenProductivityIndex'].rolling(window=5).mean()
        df['YoY'] = df['GreenProductivityIndex'].pct_change(periods=12)
        return df